
clear all
set more off
***************************************************************************************************
**************************Do Event Study/D-in-D Analysis******************************************
***************************************************************************************************
*Inputs: the main analysis dataset
*1. "VA_demographics.dta"
*2. "lausd_distance_chars_bound_data.dta"
*3. "VA_test_scores.dta"
*4."school_stability_rate.dta"
*5. "class_size_teacher_data.dta"

*Outputs (in order):
*1. Figure 4
*2. Figure B.7
*3. Table B.2

***************************************************************************************************
***************************************************************************************************
***************************************************************************************************

*Need to redo the "merge" part so that keeping all schools (I dropped schools >10 miles away previously to save memory)

*First, tag the RD Sample (so can restrict D-in-D to that sample)
clear all
set more off
use "/data_analysis/Eliso_Complete/Data/VA_demographics.dta"
keep if year==2016
merge m:1 locationcode using "/data_analysis/Eliso_Complete/Data/lausd_distance_chars_bound_data.dta"
drop if _merge==2
drop nces_id name multi_lingual principalemail schcdscode _merge
gen running_var=distance_calc-5.05
gen RD_sample=(running_var>=-1.75 & running_var<=1.75)
replace RD_sample=0 if locationcode==5902
gen treat=1 if air_filter==1
collapse RD_sample treat, by(locationcode)
save "/data_analysis/Eliso_Complete/Data/super_temp.dta", replace

*Merge in all to create analysis data set 
clear all
set more off
use "/data_analysis/Eliso_Complete/Data/VA_demographics.dta"
merge m:1 locationcode using "/data_analysis/Eliso_Complete/Data/super_temp.dta"
erase  "/data_analysis/Eliso_Complete/Data/super_temp.dta"
drop schcdscode _merge
replace treat=0 if treat==.
replace RD_sample=0 if RD_sample==.
drop if year==2017

*Merge in test scores
merge 1:1 stdpseudoid year using "/data_analysis/Eliso_Complete/Data/VA_test_scores.dta"
drop if _merge==2
drop _merge
*Note testing_grade and grade are identical in RD sample
ren grade grade2
gen grade=testing_grade
replace grade=grade2 if grade==.
drop grade2 testing_grade

*Merge in location
merge m:1 locationcode using "/data_analysis/Eliso_Complete/Data/lausd_distance_chars_bound_data.dta"
drop if _merge==2
drop nces_id name school_name air_filter distance_official distance_calc evacuated receive_evac low_grade high_grade charter charter_type gifted_program magnet gifted_magnet program multi_lingual principalemail bound_seg bound_seg2 _merge

*Getting all the controls
*Demographic controls: ell code, gender, ethnicity, age interacted with grade, frl, class and home language
encode langclscode, gen(class_lang)
encode homelangdescr, gen(home_lang)
encode ellevelcode, gen(el_code)
drop homelangdescr langclscode ellevelcode
gen age2=age*age
replace pared=7 if pared==.
replace ethnicity=10 if ethnicity==.

*Create open enroll indicator (e.g., not attending attendance zone school)
gen open_enroll=(locationcode!=schreslocationcode)
drop prof_level_math math_score ela_score 
qui compress
*Merge on: school stability rate
merge m:1 locationcode year using "/data_analysis/Eliso_Complete/Data/school_stability_rate.dta"
drop if _merge==2
drop _merge
*Class Size and Teacher Exp
merge m:1 locationcode year using "/data_analysis/Eliso_Complete/class_size_teacher_data.dta"
drop if _merge==2
drop _merge
gen missing_class_size=(class_size==.)
su class_size
replace class_size=r(mean) if class_size==.

gen valid_math=(math_scorez!=. & lag_math!=.)
gen valid_ela=(ela_scorez!=. & lag_ela!=.)
gen ela_missing=(lag_ela==.)
replace lag_ela=0 if lag_ela==.

*Create an indicator for having switched schools from prior year and distance to that switch (for referee comment)
tsset stdpseudoid year
gen moved_school=(locationcode!=L.locationcode)
replace moved_school=2 if moved_school==.
gen lagged_lat=L.latitude 
gen lagged_lon=L.longitude
geodist latitude longitude lagged_lat lagged_lon, gen(dist_move)
replace dist_move=0 if dist_move==.
gen within_year_move=(schlocationcode!=schcode_spring)
qui compress

*Control vectors
local demo_controls "i.el_code i.gender i.ethnicity i.frl i.class_lang i.home_lang i.pared open_enroll"
*School controls: (affiliated) charter, magnet
local school_controls "missing_class_size class_size below5 above10"
*Lat-lon space controls
local test_controls_math "i.grade#c.lag_math i.grade#c.lag_math#c.lag_math i.grade#c.lag_math#c.lag_math#c.lag_math i.grade#c.lag_ela i.grade#c.lag_ela#c.lag_ela i.grade#c.lag_ela#c.lag_ela#c.lag_ela i.grade i.ela_missing#i.grade#c.lag_math"
local test_controls_ela "i.grade#c.lag_math i.grade#c.lag_math#c.lag_math i.grade#c.lag_math#c.lag_math#c.lag_math i.grade#c.lag_ela i.grade#c.lag_ela#c.lag_ela i.grade#c.lag_ela#c.lag_ela#c.lag_ela i.grade i.math_missing#i.grade#c.lag_ela"
replace math_scorez=. if grade==2 & year>2013
drop if math_scorez==.
qui compress

gen post=(year>=2016)

*Turn RD Sample On/Off
*keep if RD_sample==1


***********************************************************************************************
**************************Figure 4*************************************************************
***********************************************************************************************
preserve
gen beta=.
gen lcl=.
gen ucl=.
gen t=_n
*areg math_scorez  ib(2013).year##i.treat i.grade##i.year missing_class_size class_size below5 above10 i.locationcode i.stdzipcode if grade<=5 & year>=2010 & year!=2015 & RD_sample==1, absorb(stdpseudoid) cluster(locationcode) 
reghdfe math_scorez ib(2013).year##i.treat i.grade##i.year missing_class_size class_size below5 above10 if grade<=5 & year>=2010 & year!=2015, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode) 

foreach i of numlist 0 1 2 6{
replace beta=_b[201`i'.year#1.treat] if t==1+`i'
replace lcl=_b[201`i'.year#1.treat] - 1.96*_se[201`i'.year#1.treat] if t==1+`i'
replace ucl=_b[201`i'.year#1.treat] +1.96*_se[201`i'.year#1.treat] if t==1+`i'
}
foreach i of numlist 0 1 2 6{
boottest  201`i'.year#1.treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
mat C=r(CI)
svmat C
su C1
local c1=r(mean)
su C2
local c2=r(mean)
replace lcl=`c1'  if t==1+`i'
replace ucl=`c2'  if t==1+`i'
drop C1 C2
}
drop if beta==.
collapse  beta lcl ucl, by(t)
replace t=0 if t==7
replace t=-2 if t==3
replace t=-3 if t==2
replace t=-4 if t==1
replace t = -1 in 5
replace beta=0 if t==-1
sort t

graph twoway (connected  beta t) (rcap lcl ucl t, lcolor(maroon)), xtitle("Event Time Relative to Air Filter Delivery") ytitle("Estimated Coefficient (Math Scores)") legend(off) yline(0, lcolor(black) lwidth(thick)) xline(-0.5, lpattern(dash) lcolor(black)) ylabel(-.25 "-0.25" -.125 "-0.125" 0 "0" 0.125 "0.125" 0.25 "0.25") xlabel(-4(1)0) graphregion(color(white)) bgcolor(white)
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

*Checking with regression
reghdfe math_scorez  i.post##i.treat i.grade##i.year missing_class_size class_size below5 above10 if grade<=5 & year>=2010 & year!=2015, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode) 
areg math_scorez  i.post##i.treat i.grade##i.year missing_class_size class_size below5 above10 i.locationcode i.stdzipcode if grade<=5 & year>=2010 & year!=2015 & RD_sample==1, absorb(stdpseudoid) cluster(locationcode) 
boottest  1.post#1.treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 





***********************************************************************************************
**************************Figure B.7***********************************************************
***********************************************************************************************

*Figure Changing Number of pre-years
keep if grade<=5
keep if year>=2004
gen t=_n
gen beta=.
gen lcl=.
gen ucl=.
gen post=(year>=2016)
replace below5=0 if year<=2007
replace above10=0 if year<=2007
foreach year of numlist 2004(1)2013{
*areg math_scorez  i.post##i.treat i.grade##i.year missing_class_size class_size below5 above10 i.locationcode i.stdzipcode if grade<=5 & year>=`year' & year!=2015 & RD_sample==1, absorb(stdpseudoid) cluster(locationcode)
reghdfe math_scorez  i.post##i.treat i.grade##i.year missing_class_size class_size below5 above10 if grade<=5 & year>=`year' & year!=2015, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode) 
replace beta=_b[1.post#1.treat] if t==`year'-2003 
replace lcl=_b[1.post#1.treat] - 1.96*_se[1.post#1.treat] if t==`year'-2003
replace ucl=_b[1.post#1.treat] +1.96*_se[1.post#1.treat] if t==`year'-2003

*boottest  1.post#1.treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
*mat C=r(CI)
*svmat C
*su C1
*local c1=r(mean)
*su C2
*local c2=r(mean)
*replace lcl=`c1'  if t==`year'-2003
*replace ucl=`c2'  if t==`year'-2003
*drop C1 C2
}

drop if beta==.
collapse  beta lcl ucl, by(t)
ren t n
gen t=10 if n==1
replace t=9 if n==2
replace t=8 if n==3
replace t=7 if n==4
replace t=6 if n==5
replace t=5 if n==6
replace t=4 if n==7
replace t=3 if n==8
replace t=2 if n==9
replace t=1 if n==10

graph twoway (connected  beta t) (rcap lcl ucl t, lcolor(maroon)), xtitle("Number of Pre-Years Used") ytitle("Estimated Coefficient (Math Scores)") legend(off) yline(0, lcolor(black) lwidth(thick)) xline(4, lpattern(dash) lcolor(black)) ylabel(-.1 "-0.10" 0 "0" 0.1 "0.10" 0.20 "0.20" 0.30 "0.30") xlabel(1(1)10) graphregion(color(white)) bgcolor(white)

***********************************************************************************************
***********************************************************************************************
***********************************************************************************************




***********************************************************************************************
***************************Table B.2***********************************************************
***********************************************************************************************
preserve
*Toggle on/off below line for 1st or 2nd row
keep if RD_sample==1
*Column (1)
reghdfe math_scorez i.post##i.treat i.grade##i.year  i.moved_school dist_move i.within_year_move if grade<=5 & year>=2010 & year!=2015, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode)
*Only run if restricted to RD Sample (e.g., keep if RD_sample==1)
*boottest  1.post#1.treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
*Column (2)
reghdfe math_scorez i.post##i.treat i.grade##i.year  i.moved_school dist_move i.within_year_move if grade<=5 & year>=2010 & year!=2015 & locationcode!=2117, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode)
*Column (3)
reghdfe math_scorez i.post##i.treat i.grade##i.year  i.moved_school dist_move i.within_year_move if grade<=8 & year>=2010 & year!=2015, absorb(stdpseudoid locationcode stdzipcode) cluster(locationcode)
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************


